********************************************************************
***** DATA CREATION:  GENDER AND COLLABORATION (LORENZO DUCTOR, SANJEEV GOYAL AND ANJA PRUMMER)
***** Start: 30.06.2018
***** Last change:  10.09.2021
***** CREATED BY: Lorenzo Ductor (lductor@ugr.es)   
***** OBJECTIVE: Create the main database, auth7017fields5y.dta 
***** Input files: Allarticles7017ver2.dta, gender7017countryapi
********************************************************************

/**************************************************************************************/
/***********************Data at the article level***************************************/
/**************************************************************************************/


/* 1) Creating the data for the network analysis*/

use "Allarticles7017ver2.dta", clear
merge m:1 journalid using "journalid.dta", update
drop if _merge==2 /*39 journals have no recorded articles */
drop _merge
merge m:1 journalid year using "ais.dta"
keep articleid year nauthors journalid auth1 auth2 auth3 auth4 auth5 auth6 auth7 prod Impact ai10
replace Impact=0 if missing(Impact)
replace ai10=0 if missing(ai10) 
rename Impact jcr
rename ai10 ais
gen prodd=prod/nauthors
gen jcrd=jcr/nauthors
gen aisd=ais/nauthors
order articleid auth1-auth7 year journalid 
drop if articleid==.
forvalues i=1(1)7{
drop if auth`i'==. & nauthors==`i'
}
export delimited using "network7017.csv", replace



/*2) JEL codes*/

/* 1 digit*/
    use "Allarticles7017ver2.dta"
	keep articleid jel*
	forvalues i=1/7{
	gen letter`i'=substr(jel`i',1,1)
	}
	gen sum=0
	forvalues i=1/7{
	replace sum=sum+1 if !missing(letter`i')
	}
	foreach i in `c(ALPHA)'{
	gen `i'=0
	forvalues j=1/7{
	replace `i'=`i'+1 if letter`j'=="`i'"
	}
	gen s`i'=`i'/sum
	drop `i'
	rename s`i' `i'
	}

	keep articleid `c(ALPHA)'
	save jel1d7017,replace

/* 2 digits*/	
	
	use "Allarticles7017ver2.dta", clear
	keep articleid jel*
	forvalues i=1/20{
	gen letter`i'=substr(jel`i',1,2)
	}
	gen sum=0
	forvalues i=1/20{
	replace sum=sum+1 if !missing(letter`i')
	}
	local vlist `c(ALPHA)'
	foreach i of local vlist{
	forval n=1/9{
	gen `i'`n'=0
	di `i'`n'
	forvalues j=1/7{
	replace `i'`n'=`i'`n'+1 if letter`j'=="`i'`n'"
	}
	gen s`i'`n'=`i'`n'/sum
	drop `i'`n'
	rename s`i'`n' `i'`n'
		}
	}
drop A4-A9
drop B6-B9
drop E8-E9
drop F7-F9
drop G5-G9
drop H9
drop I4-I9
drop J9
drop K5-K9
drop M6-M9
drop O6-O9
drop P6-P9
drop Q6-Q9
drop R6-R9
drop S1-X9
drop Y4
drop Z4-Z9
keep articleid A1 A2 A3 B1 B2 B3 B4 B5 C1 C2 C3 C4 C5 C6 C7 C8 C9 D1 D2 D3 D4 D5 D6 D7 D8 D9 E1 E2 E3 E4 E5 E6 E7 F1 F2 F3 F4 F5 F6 G1 G2 G3 G4 H1 H2 H3 H4 H5 H6 H7 H8 I1 I2 I3 J1 J2 J3 J4 J5 J6 J7 J8 K1 K2 K3 K4 L1 L2 L3 L4 L5 L6 L7 L8 L9 M1 M2 M3 M4 M5 N1 N2 N3 N4 N5 N6 N7 N8 N9 O1 O2 O3 O4 O5 P1 P2 P3 P4 P5 Q1 Q2 Q3 Q4 Q5 R1 R2 R3 R4 R5 Y1 Y2 Y3 Y5 Y6 Y7 Y8 Y9 Z1 Z2 Z3
save jel2d7017,replace


/*3) merging the jel codes and main article database*/ 


import delimited "network7017.csv", clear
merge 1:1 articleid using jel1d7017
keep if _merge==3
drop _merge
merge 1:1 articleid using jel2d7017
keep if _merge==3
drop _merge
save articles7017, replace





/**************************************************************************************/
/***********************Data at the author level***************************************/
/**************************************************************************************/

/*1) GENDER: Needs to run "create_gender_countryapi.do" first*/


/*Adding gender to the article database*/
set more off
use articles7017, clear
drop  A-Z3
bys articleid: gen t=_n
reshape long auth, i(articleid t) j(n)
drop if auth==.
merge m:1 auth using gender7017countryapi
drop if articleid==.
keep articleid auth femaleapi nauthors
duplicates drop
bys articleid: gen t=_n
reshape wide auth femaleapi, i(articleid) j(t)
drop if auth2==. & nauthors==2
drop if auth3==. & nauthors==3
drop if auth4==. & nauthors==4
drop if auth5==. & nauthors==5
drop if auth6==. & nauthors==6
drop if auth7==. & nauthors==7
joinby articleid using articles7017, unmatched(master)
drop _merge
export delimited using "networkgender7017", replace


/* 2) STRENGTH: */

forvalues i=1974/2017{
use articles7017, clear
gen year2=.
drop if year<`i'-4
keep if year<`i'+1
drop if auth2==. & nauthors>=2
drop if auth3==. & nauthors>=3
drop if auth4==. & nauthors>=4
drop if auth5==. & nauthors>=5
drop if auth6==. & nauthors>=6
drop if auth7==. & nauthors>=7
keep articleid year auth1 auth2 auth3 auth4 auth5 auth6 auth7 nauthors year2 
bys articleid: gen t=_n
reshape long auth, i(articleid t) j(n)
drop if auth==.
bys articleid auth: gen np=_n /*checking articles with duplicated auth id*/
bys articleid auth: gen sn=_N
keep if np==1 /* 10 articles with duplicated authid, keeping one*/
gen x=1
drop n sn
bys auth: egen snpapers5=sum(x)
drop if nauthors<2
expand nauthors
sort articleid auth
by articleid auth: gen numid2 = _n
by articleid: gen auth2 = auth[nauthors * numid2]
drop if auth==auth2
bys auth: gen d=_n
bys auth: egen degree=max(d)
egen newid = group(auth auth2)
bys newid: gen intensity = _N
bys newid: gen intensityd =intensity/(nauthors-1)
drop if newid==.

keep auth auth2 newid intensity intensityd snpapers5 year2 degree
duplicates drop
bys auth: egen strength=mean(intensity)
bys auth: egen strengths=sum(intensity)
bys auth: egen strengthd=mean(intensityd)
bys auth: egen strengthsd=sum(intensityd)
bys auth: gen strengthppaper=strength/snpapers5
bys auth: gen strengthppaperd=strengthd/snpapers5
keep auth strength strengthppaper strengths strengthd strengthppaperd strengthsd year2
duplicates drop
replace year2=`i'
save strength`i',  replace 
}

use strength1974, clear
forvalues i=1975/2017{
append using strength`i'
}
rename year2 year
rename auth auth
save strength7017, replace


/*3) Merging all the network data*/
set more off
forval i = 1974/2017{
insheet using "network`i'_5y.csv",clear
gen year=`i'
drop v1
rename vgauth auth
sort auth year
order auth year
rename vgdeg degree
rename vgdeg2 degree2
rename vgtran clustering
save network`i'_5y,replace
}

use network1974_5y, clear
forval i = 1975/2017{
append using network`i'_5y
}
drop if degree==0
save network_5y, replace


/*4) CREATING EXPERIENCE OF COAUTHORS*/

import delimited "networkgender7017.csv", clear
keep articleid auth1 femaleapi1 auth2 femaleapi2 auth3 femaleapi3 auth4 femaleapi4 auth5 femaleapi5 auth6 femaleapi6 auth7 femaleapi7 nauthors year journalid prod jcr ais prodd jcrd aisd
bys articleid: gen t=_n
reshape long auth femaleapi, i(articleid t) j(n)
drop if auth==.
bys auth: egen myear=min(year)
drop t n
bys articleid: gen t=_n
duplicates drop
reshape wide auth femaleapi myear, i(articleid) j(t)
export delimited "networkgendert7017.csv", replace
/*Adding coauthors' characteristics, use network code in R: "netprodbygender.R" and datafile "networkgendert7017.csv"*/

/*Once the R program finishes, run this code to merge all files*/
set more off
forval i = 1974/2017{
insheet using "networkprod`i'_5y.csv",clear
gen year=`i'
drop v1
rename vgauth auth
sort auth year
order auth year
rename vgdeg degreex 
rename vgnetprodmale netprodm
rename vgnetprodfemale netprodf
rename vgnetprodm netprodmis
rename vgnetaismale netprodaism
rename vgnetaisfemale netprodaisf
rename vgpapersmale neighpapersm
rename vgpapersfemale neighpapersf
rename vgpapersm neighpapersmis
rename vgnmaleco namecom
rename vgnfemaleco namecof
rename vgnmco namecomis
rename vgtmale avgtcom
rename vgtfemale avgtcof
rename vgtm avgtcomis
save neighchar`i'_5y,replace
}
use neighchar1974_5y, clear
forval i = 1975/2017{
append using neighchar`i'_5y
}
drop if degreex==0
save neighchar7017_5y, replace




/*5) creating coauthorship and publications by TIranking*/
set more off
import delimited "networkgender7017.csv", clear
drop if nauthors<0
drop if auth2==. & nauthors==2
drop if auth3==. & nauthors==3
drop if auth4==. & nauthors==4
drop if auth5==. & nauthors==5
drop if auth6==. & nauthors==6
drop if auth7==. & nauthors==7
joinby journalid using "journal.dta", unmatched(master)
drop _merge
egen genderpaper=rowmin(femaleapi1 femaleapi2 femaleapi3 femaleapi4 femaleapi5 femaleapi6 femaleapi7)
bys TIranking: tab genderpaper
gen decade70=1 if year<1980
gen decade80=1 if year>1979 & year<1990
gen decade90=1 if year>1989 & year<2000
gen decade00=1 if year>1999 & year<2010
gen decade10=1 if year>=2010
foreach i in decade70 decade80 decade90 decade00 decade10{
replace `i'=0 if missing(`i')
}
bys TIranking: tab genderpaper if decade70==1
bys TIranking: tab genderpaper if decade80==1
bys TIranking: tab genderpaper if decade90==1
bys TIranking: tab genderpaper if decade00==1
bys TIranking: tab genderpaper if decade10==1


tab genderpaper if decade70==1
tab genderpaper if decade80==1
tab genderpaper if decade90==1
tab genderpaper if decade00==1
tab genderpaper if decade10==1

gen co=1 if nauthors>1
replace co=0 if nauthors<=1

gen coAA=1 if nauthors>1 & TIranking=="AA" 
replace coAA=0 if nauthors<=1 & TIranking=="AA" 

gen coA=1 if nauthors>1 & TIranking=="A" 
replace coA=0 if nauthors<=1 & TIranking=="A" 

gen coB=1 if nauthors>1 & TIranking=="B" 
replace coB=0 if nauthors<=1 & TIranking=="B" 

replace TIranking="C" if TIranking==""

gen coC=1 if nauthors>1 & TIranking=="C" 
replace coC=0 if nauthors<=1 & TIranking=="C" 


gen coBS=1 if nauthors>1 & journalname=="American-Economic-Review" | journalname=="Journal-of-Political-Economy" | journalname=="Quarterly-Journal-of-Economics"
replace coBS=0 if nauthors<=1 & journalname=="American-Economic-Review" | journalname=="Journal-of-Political-Economy" | journalname=="Quarterly-Journal-of-Economics"

/*Creating the panel*/
drop a a1 a2 a3 b b1 b2 b3 b4 b5 c c1 c2 c3 c4 c5 c6 c7 c8 c9 d d1 d2 d3 d4 d5 d6 d7 d8 d9 e e1 e2 e3 e4 e5 e6 e7 f f1 f2 f3 f4 f5 f6 g g1 g2 g3 g4 h h1 h2 h3 h4 h5 h6 h7 h8 i i1 i2 i3 j j1 j2 j3 j4 j5 j6 j7 j8 k k1 k2 k3 k4 l l1 l2 l3 l4 l5 l6 l7 l8 l9 m m1 m2 m3 m4 m5 n n1 n2 n3 n4 n5 n6 n7 n8 n9 o o1 o2 o3 o4 o5 p p1 p2 p3 p4 p5 q q1 q2 q3 q4 q5 r r1 r2 r3 r4 r5 s t u v w x y y1 y2 y3 y5 y6 y7 y8 y9 z z1 z2 z3
gen t=_n
reshape long auth femaleapi, i(t) 
sort auth year 
drop t _j 
drop if auth==.

drop prod prodd journalid articleid

bys auth year: gen snpapers=_N
bys auth year: egen ncopapers=sum(co)

drop ais aisd jcrd jcr mn decade10 co since70 keele Impact Impact5y genderpaper decade70 decade80 decade90 decade00

/*Adding number of papers per ranking*/
gen x=1 if TIranking=="AA"
replace x=0 if missing(x)
gen y=1 if TIranking=="A"
replace y=0 if missing(y)
gen z=1 if TIranking=="B"
replace z=0 if missing(z)
gen a=1 if TIranking=="C"
replace a=0 if missing(a)
gen b=1 if journalname=="American-Economic-Review" | journalname=="Journal-of-Political-Economy" | journalname=="Quarterly-Journal-of-Economics"
replace b=0 if missing(b)
bys auth year: egen snpapersAA=sum(x)
bys auth year: egen snpapersA=sum(y)
bys auth year: egen snpapersB=sum(z)
bys auth year: egen snpapersC=sum(a)
bys auth year: egen snpapersbs=sum(b)
drop x y z a b

bys auth year: egen mnauthors=mean(nauthors)
bys auth year: egen mncoauthors=mean(nauthors) if nauthors>1
bys auth year: egen mmncoauthors=max(mncoauthors)

bys auth year: egen mnauthorsAA=mean(nauthors) if TIranking=="AA"
bys auth year: egen mmnauthorsAA=max(mnauthorsAA) 
bys auth year: egen mncoauthorsAA=mean(nauthors) if nauthors>1 & TIranking=="AA"
bys auth year: egen mmncoauthorsAA=max(mncoauthorsAA) 

bys auth year: egen mnauthorsA=mean(nauthors) if TIranking=="A"
bys auth year: egen mmnauthorsA=max(mnauthorsA) 
bys auth year: egen mncoauthorsA=mean(nauthors) if nauthors>1 & TIranking=="A"
bys auth year: egen mmncoauthorsA=max(mncoauthorsA) 

bys auth year: egen mnauthorsB=mean(nauthors) if TIranking=="B"
bys auth year: egen mmnauthorsB=max(mnauthorsB) 
bys auth year: egen mncoauthorsB=mean(nauthors) if nauthors>1 & TIranking=="B"
bys auth year: egen mmncoauthorsB=max(mncoauthorsB) 

bys auth year: egen mnauthorsC=mean(nauthors) if TIranking=="C"
bys auth year: egen mmnauthorsC=max(mnauthorsC) 
bys auth year: egen mncoauthorsC=mean(nauthors) if nauthors>1 & TIranking=="C"
bys auth year: egen mmncoauthorsC=max(mncoauthorsC) 
drop mnauthorsAA mnauthorsA mnauthorsB mnauthorsC mncoauthorsAA mncoauthorsA mncoauthorsB mncoauthorsC

bys auth year: egen ncopaperst5=sum(coAA)
gen coauthorshipAA=ncopaperst5/snpapersAA

bys auth year: egen ncopapersA=sum(coA)
gen coauthorshipA=ncopapersA/snpapersA 

bys auth year: egen ncopapersB=sum(coB)
gen coauthorshipB=ncopapersB/snpapersB

bys auth year: egen ncopapersC=sum(coC)
gen coauthorshipC=ncopapersC/snpapersC

bys auth year: egen ncopapersbs=sum(coBS)
gen coauthorshipBS=ncopapersbs/snpapersbs


/*adding TIranking, best publication in the ranking per year*/
gen ti=3 if TIranking=="AA"
replace ti=2 if TIranking=="A"
replace ti=1 if TIranking=="B"
replace ti=0 if TIranking=="D"
bys auth year: egen Titop=max(ti)
replace ti=0 if missing(ti)


keep auth year femaleapi coauthorshipAA coauthorshipA coauthorshipB coauthorshipC coauthorshipBS ncopapers snpapers Titop snpapersAA snpapersA snpapersB snpapersC snpapersbs mmncoauthorsAA mmncoauthors mmncoauthorsA mmncoauthorsB mmncoauthorsC mnauthors mmnauthorsAA mmnauthorsA mmnauthorsB mmnauthorsC
replace snpapers=0 if missing(snpapers)
duplicates drop
xtset auth year, yearly
save coauth7017, replace


/*6) Creating the research output variables*/
import delimited "networkgender7017.csv", clear
save auth7017, replace

use auth7017,clear
/*Creating the panel*/
drop t
gen t=_n
reshape long auth femaleapi, i(t) 
sort auth year 
drop t _j 
drop if auth==.

/*Aggregating publications by year using KY index, JCR in 2017, and AIS*/
bys auth year: egen sprodd=sum(prodd)
bys auth year: egen sprod=sum(prod)

bys auth year: egen sjcrd=sum(jcrd)
bys auth year: egen sjcr=sum(jcr)

bys auth year: egen sais=sum(ais)
bys auth year: egen saisd=sum(aisd)

bys auth year: egen mprod=max(prod) /*paper of the best quality*/
bys auth year: egen mnauthors=mean(nauthors) /*average number of authors per year*/
bys auth year: gen snpapers=_N /*total number of papers*/

foreach i of varlist a-z{
bys auth year: egen s`i'=sum(`i')
drop `i'
}

foreach i of varlist a1-z3{
bys auth year: egen s`i'=sum(`i')
drop `i'
rename s`i' `i'
}

drop prod prodd ais aisd nauthors journalid articleid jcr jcrd
duplicates drop
xtset auth year, yearly
bys auth: egen ystart=min(year)
bys auth: egen yend=max(year)
save auth7017, replace


/* generate a list of all author idcodes in the data */
collapse (mean) year, by(auth)
keep auth
save authorlist, replace

/* create a file with each author by year, and identify the beginning
   and end of publishing career */
set more off
clear 
set obs 49
gen year=_n+1968
tab year

cross using authorlist

sort auth year
merge auth year using auth7017, sort
tab _merge
drop _merge
compress

*sum

egen mystart=mean(ystart), by(auth)
drop if year<mystart

egen myend=mean(yend), by(auth)

drop ystart yend
rename mystart ystart
rename myend yend
drop if year>yend
save auth7017, replace



/*Adding gender, fields and strength*/
set more off
use auth7017.dta, clear

joinby auth year using network_5y, unmatched(master)
drop _merge
label variable degree "Degree"
label variable degree2 "Degree of order 2"

bys auth: egen femalem=max(femaleapi)
drop femaleapi
rename femalem femaleapi
sort auth year
order auth year femaleapi

joinby auth year using strength7017, unmatched(master)
drop _merge

/*Defining the panel as the author's last publication*/

gen t=(year-ystar)+1

gen gc=1 if degreegc!=.
replace gc=0 if missing(gc)

xtset auth year, yearly

foreach i in sprodd sprod sjcrd sjcr sais saisd snpapers{
replace `i'=0 if missing(`i')
gen `i'5y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i')
gen `i'3y=(`i'+L.`i'+L2.`i')
} 


bys auth: gen cprodd=sum(sprodd)
bys auth: gen cprod=sum(sprod)
bys auth: gen totpapers=sum(snpapers)

gen lbet=log(betweenness+1)

replace degree=0 if missing(degree) & snpapers5!=0


label variable clustering "Clustering"
label variable femaleapi "femaleapi"
label variable closeness "Closeness"
label variable betweenness "Betweenness"
label variable eigenvector "Eigenvector"


gen cprodl5=L5.cprod
gen cprod5=cprod-cprodl5
gen cproddl5=L5.cprodd
gen cprodd5=cprodd-cproddl5
gen tpapersl5=L5.totpapers
gen tpapers5=totpapers-tpapersl5
bys auth: egen mt=max(t)
label variable cprodl5 "Past Output"
label variable cprod5 "Recent Output"
label variable cproddl5 "Past Output discounted"
label variable cprodd5 "Recent Output discounted"
label variable cprodd "Recent and past Output discounted"
label variable tpapersl5 "Past number of publications"
label variable tpapers5 "Recent number of publications"
label variable gc "Giant component Dummy"
save auth7017fields, replace


/*adding coauthors productivity and experience*/
use auth7017fields, clear
joinby auth year using neighchar7017_5y, unmatched(master)
save auth7017fields, replace

/*Creating 5-years field fixed effects: do only once*/ 
use auth7017fields, clear
foreach i of varlist sa-sz{
set more off
gen `i'l=L.`i'
gen `i'l2=L2.`i'
gen `i'l3=L3.`i'
gen `i'l4=L4.`i'
egen `i'5y=rowtotal(`i' `i'l `i'l2 `i'l3 `i'l4)
drop `i' `i'l `i'l2 `i'l3 `i'l4 
rename `i'5y `i'
}


foreach i of varlist a1-z3{
set more off
gen `i'l=L.`i'
gen `i'l2=L2.`i'
gen `i'l3=L3.`i'
gen `i'l4=L4.`i'
egen `i'5y=rowtotal(`i' `i'l `i'l2 `i'l3 `i'l4)
drop `i' `i'l `i'l2 `i'l3 `i'l4 
rename `i'5y `i'
}

foreach i in snpapers{
gen `i'l=L.`i'
gen `i'l2=L2.`i'
gen `i'l3=L3.`i'
gen `i'l4=L4.`i'
egen `i'5yb=rowtotal(`i' `i'l `i'l2 `i'l3 `i'l4)
drop `i' `i'l `i'l2 `i'l3 `i'l4 
}

foreach i of varlist sa-sz{
replace `i'=`i'/snpapers5yb 
}

foreach i of varlist a1-z3{
replace `i'=`i'/snpapers5yb 
}
save auth7017fields5y, replace



/*Adding coauthors' characteristics*/
use auth7017fields5y, clear
drop _merge
joinby auth year using coauth7017, unmatched(master)
drop _merge
/*Adding citations: citations only available till 2011, only in appendix*/
joinby auth year using "authcites.dta", unmatched(master)
drop _merge
save auth7017fields5y, replace



/***********REDIFINING AIS AND REMOVING IRRELEVANT VARIABLES********************/
/*Redefining AIS to an index from 0 to 100*/
use auth7017fields5y, clear
drop _merge
/*removing the AIS indexes defined from 0 to 1 and other irrelevant variables*/
drop sjcrd5y sjcrd3y sjcr5y sjcr3y betweenness closeness eigenvector degree2 degreegc clusteringgc gc degreex surname suffix firstname initials pfemale pmale strength strengthd strengthsd strengthppaperd sjcr sjcrd mnauthors mprod mt snpapers5yb sais5y sais3y saisd5y saisd3y sais saisd strengths
drop mmncoauthors mmnauthorsAA mmncoauthorsAA mmnauthorsA mmncoauthorsA mmnauthorsB mmncoauthorsB mmnauthorsC mmncoauthorsC coauthorshipAA coauthorshipA coauthorshipB coauthorshipC coauthorshipBS
drop netprodm netprodf netprodmis neighpapersm neighpapersf neighpapersmis
/*adding the AIS defined from 0 to 100*/
joinby auth year using "aisauthyear.dta", unmatched(master)
drop _merge
save auth7017fields5y, replace




/**************CREATING VARIABLES FOR THE EMPIRICAL ANALYSIS****************/

use auth7017fields5y,clear
xtset auth year, yearly

foreach i in snpapersAA snpapersA snpapersB{
replace `i'=0 if missing(`i')
gen `i'5y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i')
}

foreach i of varlist sa-sz{
replace `i'=0 if missing(`i')
}

foreach i in degree strengthppaper clustering cproddl5{
sum `i'
gen `i'z=(`i'-r(mean))/r(sd)
}

set more off
foreach i in ncopapers{
replace `i'=0 if missing(`i')
gen `i'5y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i')
}
/*SELECTION*/
drop if ystart<1974 /*removing first cohorts, since career time is unkown*/



gen coauthorship5y=ncopapers5y/snpapers5y
/*Creating average coauthors' experience from average coauthors' experience by gender*/
gen ncoauthors=namecom+namecof+namecomis
gen fmale=namecom/ncoauthors
gen ffemaleapi=namecof/ncoauthors
gen nmissing=namecomis/ncoauthors

replace avgtcom=0 if namecom==0
replace avgtcof=0 if namecof==0
replace avgtcomis=0 if namecomis==0

replace fmale=0 if namecom==0
replace ffemaleapi=0 if namecof==0
replace nmissing=0 if namecomis==0

gen avgcot=avgtcom*fmale+avgtcof*ffemaleapi+avgtcomis*nmissing

/*Creating average coauthors' output*/

replace netprodaism=netprodaism*100 /*using AIS index from 0 to 100 instead of 0 to 1*/
replace netprodaisf=netprodaisf*100

gen avnetprodaism=(netprodaism)/namecom
gen avnetprodaisf=(netprodaisf)/namecof
gen lavnetprodaism=log(avnetprodaism+1)
gen lavnetprodaisf=log(avnetprodaisf+1)
gen avnetprodais=(netprodaisf+netprodaism)/(namecom+namecof)
gen lavnetprodais=log(avnetprodais+1)

/*Creating log variables*/
foreach i in netprodaisf netprodaism avgtcof avgtcom avgcot{
	gen l`i'=log(`i'+1)
}

/*replacing missing values in networks by 0 and adding missing dummies*/
foreach i in degree strengthppaper clustering coauthorship5y lnetprodaisf lnetprodaism lavgtcof lavgtcom lavgcot avgcot lavnetprodais lavnetprodaisf lavnetprodaism{ 
    gen d`i'=1 if missing(`i') & snpapers5y!=.
	replace d`i'=0 if missing(d`i') & snpapers5y!=.
	replace `i'=0 if missing(`i') & snpapers5y!=.
}


foreach i in sais saisd{
replace `i'=0 if missing(`i')
gen `i'5y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i')
gen `i'3y=(`i'+L.`i'+L2.`i')
gen `i'6y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i'+L5.`i')
gen `i'7y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i'+L5.`i'+L6.`i')
gen `i'8y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i'+L5.`i'+L6.`i'+L7.`i')
gen `i'9y=(`i'+L.`i'+L2.`i'+L3.`i'+L4.`i'+L5.`i'+L6.`i'+L7.`i'+L8.`i')
} 
/*Creating cumulative output*/
bys auth: gen caisd=sum(saisd)
bys auth: gen cais=sum(sais)
bys auth: gen tpapersAA=sum(snpapersAA)

/*Creating past output based on AIS*/
gen caisdl5=L5.caisd
/*Creating recent output based on AIS*/
gen caisd5=caisd-caisdl5

/*Creating past output based on top 5 publications*/
gen tpapersAAl5=L5.tpapersAA
/*Creating recent output based on top 5*/
gen tpapersAA5=tpapersAA-tpapersAAl5

/* TI ranking papers*/
gen snpapersti=snpapersAA+snpapersA+snpapersB
gen snpapersti5y=snpapersAA5y+snpapersA5y+snpapersB5y

bys auth: gen tpapersti=sum(snpapersti)
gen tpaperstil5=L5.tpapersti
gen tpapersti5=tpapersti-tpaperstil5
/*Creating future output*/
/*KY index*/
gen  lcprodd5=log(cprodd5 +1)
gen lcproddl5=log(cproddl5+1)

/*AIS index*/
gen lcaisdl5=log(caisdl5+1)
gen lcaisd5=log(caisd5+1)
gen lsaisd5y=log(saisd5y+1)

/*Replacing missing values for zero*/
replace lcaisdl5=0 if missing(lcaisdl5)
replace tpapersAAl5=0 if missing(tpapersAAl5)

/*Lagged networks*/
/*creating lagged network variables and replacing missing values by 0*/
foreach i in degree strengthppaper clustering lavnetprodais lavnetprodaisf lavnetprodaism lavgtcof lavgtcom lavgcot avgcot{
	gen `i'l5=L5.`i'
}
/*adding the lagged missing dummies*/
foreach i in   degree strengthppaper clustering lavnetprodais lavnetprodaisf lavnetprodaism lavgtcof lavgtcom lavgcot avgcot{
	gen d`i'l5=1 if missing(`i'l5) 
	replace d`i'l5=0 if missing(d`i'l5)
    replace `i'l5=0 if missing(`i'l5)
}


/*Defining the variables*/
label variable cprodl5 "Past Output"
label variable cprod5 "Recent Output"
label variable cproddl5 "Past Output discounted"
label variable cprodd5 "Recent Output discounted"
label variable cprodd "Recent and past Output discounted"
label variable tpapersl5 "Past number of publications"
label variable tpapers5 "Recent number of publications"
label variable tpapersAAl5 "Past number of top 5 publications"
label variable tpapersAA5 "Recent number of top 5  publications"
label variable caisdl5 "Past Output discounted"
label variable caisd5 "Recent Output discounted"
label variable clustering "Clustering"
label variable femaleapi "femaleapi"
/*removing irrelevant variables*/
drop netprodaism netprodaisf avnetprodaism avnetprodaisf lavnetprodaism lavnetprodaisf avnetprodais lnetprodaisf lnetprodaism dlnetprodaisf dlnetprodaism dlavnetprodaisf dlavnetprodaism lavnetprodaisfl5 lavnetprodaisml5 dlavnetprodaisfl5 dlavnetprodaisml5 lbet 
drop avgtcom avgtcof avgtcomis lavgtcof lavgtcom lavgcot dlavgtcof dlavgtcom dlavgcot lavgtcofl5 lavgtcoml5 lavgcotl5 dlavgtcofl5 dlavgtcoml5 dlavgcotl5
drop ncoauthors fmale ffemaleapi nmissing
save auth7017fields5y, replace 
